#### "Economic Inequality. Income and political participation in authoritarian regimes" ####
# authors: "Pelke, Lars"
# date: 2019-06-17
# written under "R version 3.6.0 (2019-03-11)"
# Multiple Imputation

#### Preliminaries ####

R.version$version.string

# clear workspace
rm(list=ls())

# set working directory

# loading packages

library(countrycode)
library(tidyverse)
library(viridis)
library(haven)
library(lme4)
library(sjstats)
library(texreg)
library(missForest)
library(mice)
library(VIM)
require(Amelia)

#### Supplementary Appendix E6 

#### Import Data ####
wvsdata <- readRDS("data/wvsdata.rds") 

#### Reducing to authoritarian regimes ####

wvsdata <- wvsdata %>%
  filter(v2x_regime_amb<=4)

wvsdata <- wvsdata %>%
  mutate(country_year = stringr::str_c(country_name, year, sep=" "))

table(wvsdata$country_year, is.na(wvsdata$gini_mkt))


#### Building variables ####

#Replacing missings in Dependent Variables

wvsdata <- wvsdata %>%
  filter(A064>=0 | A066>=0 | A067>=0 | A068>=0 | A069>=0 | A070>=0 |
           A072>=0 | A073>=0 | A074>=0 |A075>=0 | A076>=0 | A077>=0
         | A078>=0 | A079>=0 | A099>=0 | A100>=0 | A101>=0 | A102>=0
         | A103>=0 | A104>=0 | A105>=0 | A106>=0 | A106B>=0 | A106C>=0 )%>%
  mutate(civilsociety_b = if_else(A064==1, 1,
                                  if_else(A066==1, 1,
                                          if_else(A067==1, 1, 
                                                  if_else(A068==1, 1,
                                                          if_else(A069==1, 1, 
                                                                  if_else(A070==1, 1,
                                                                          if_else(A072==1, 1, 
                                                                                  if_else(A073==1, 1,
                                                                                          if_else(A074==1, 1,
                                                                                                  if_else(A075==1, 1,
                                                                                                          if_else(A076==1, 1, 
                                                                                                                  if_else(A077==1, 1,
                                                                                                                          if_else(A078==1, 1,
                                                                                                                                  if_else(A079==1, 1, 
                                                                                                                                          if_else(A099>=1, 1,
                                                                                                                                                  if_else(A100>=1, 1, 
                                                                                                                                                          if_else(A101>=1, 1,
                                                                                                                                                                  if_else(A102>=1, 1,
                                                                                                                                                                          if_else(A103>=1, 1,
                                                                                                                                                                                  if_else(A104>=1, 1, 
                                                                                                                                                                                          if_else(A105>=1, 1,
                                                                                                                                                                                                  if_else(A106>=1, 1,
                                                                                                                                                                                                          if_else(A106B>=1, 1, 
                                                                                                                                                                                                                  if_else(A106C>=1, 1, 0)))))))))))))))))))))))))

table(wvsdata$civilsociety_b)
sum(is.na(wvsdata$civilsociety_b))

#Repalcing missings in Independent Variables

wvsdata <- wvsdata %>%
  mutate(sex = ifelse(X001==1, 1,
                      ifelse(X001==2, 0, NA)))
table(wvsdata$sex)
sum(is.na(wvsdata$sex))

wvsdata <- wvsdata %>%
  mutate(age= X003) %>%
  mutate(age = ifelse(X003<0, NA, age))
summary(wvsdata$age)
sum(is.na(wvsdata$age))

wvsdata <- wvsdata %>%
  mutate(education= X025) %>%
  mutate(education = ifelse(X025<0, NA, education))
summary(wvsdata$education)
sum(is.na(wvsdata$education))

wvsdata <- wvsdata %>%
  mutate(income_deciles= X047) %>%
  mutate(income_deciles = ifelse(X047<0, NA, income_deciles))
summary(wvsdata$income_deciles)
sum(is.na(wvsdata$income_deciles))

wvsdata <- wvsdata %>%
  mutate(income_quintiles = ifelse(income_deciles==1, 1,
                                   ifelse(income_deciles==2, 1,
                                          ifelse(income_deciles==3, 2,
                                                 ifelse(income_deciles==4, 2,
                                                        ifelse(income_deciles==5, 3,
                                                               ifelse(income_deciles==6, 3,
                                                                      ifelse(income_deciles==7, 4,
                                                                             ifelse(income_deciles==8, 4,
                                                                                    ifelse(income_deciles==9, 5,
                                                                                           ifelse(income_deciles==10, 5, X047))))))))))) %>%
  mutate(income_quintiles = ifelse(X047<0, NA, income_quintiles))
summary(wvsdata$income_quintiles)
sum(is.na(wvsdata$income_quintiles))

wvsdata <- wvsdata %>%
  mutate(children= ifelse(X011>0, 1,
                          ifelse(X011==0, 0, X011))) %>%
  mutate(children = ifelse(X011<0, NA, children))
summary(wvsdata$children)
sum(is.na(wvsdata$children))

wvsdata <- wvsdata %>%
  mutate(unemployed= ifelse(X028==7, 1,
                            ifelse(X028>=1 & X028<7, 0,
                                   ifelse(X028==8, 0, X028)))) %>%
  mutate(unemployed = ifelse(X028<0, NA, unemployed))
summary(wvsdata$unemployed)
sum(is.na(wvsdata$unemployed))

###Data Manipulation of GINI measures, fill some missing value with last national observations

missing <- wvsdata %>% 
  group_by(country_year) %>% 
  summarise(sumNA = sum(is.na(gini_mkt)))
missing

wvsdata <- wvsdata %>%
  mutate(gini_mkt=ifelse(country_year=="Algeria 2014", 36.7, gini_mkt)) %>% # Algeria 2011
  mutate(gini_disp=ifelse(country_year=="Algeria 2014", 35, gini_disp)) %>%
  mutate(gini_mkt=ifelse(country_year=="Azerbaijan 2011", 38.3, gini_mkt)) %>% # Azerbaijan 2011
  mutate(gini_disp=ifelse(country_year=="Azerbaijan 2011", 30.3, gini_disp)) %>%
  mutate(gini_disp=ifelse(country_year=="Lebanon 2013", 38.5, gini_mkt)) %>% # Lebanon 2012
  mutate(gini_disp=ifelse(country_year=="Lebanon 2013", 36.5, gini_disp))

wvsdata <- wvsdata %>%
  mutate(gini_mkt=ifelse(country_year=="Haiti 2016", 57.6, gini_mkt)) %>%
  mutate(gini_disp=ifelse(country_year=="Haiti 2016", 53.7, gini_disp)) %>%
  mutate(gini_mkt=ifelse(country_year=="Nigeria 2012", 46.8, gini_mkt)) %>%
  mutate(gini_disp=ifelse(country_year=="Nigeria 2012", 44.1, gini_disp)) 

#### Data manipulation ####

wvsdata <- wvsdata %>%
  mutate(democratic_expericene_all = ifelse(is.na(democratic_expericene_all), 0, democratic_expericene_all), 
         democratic_expericene = ifelse(is.na(democratic_expericene), 0, democratic_expericene), 
         regime_support_group_max = ifelse(is.na(regime_support_group_max), 0, regime_support_group_max))



### delete Kuwait 2014, Uzbekistan 2011 and Libya 2014, no GINI information in a range of maximum 4 years before or after the country-year observations

##### Reducing to those country-years with macro-level observations ####

wvsdata <- wvsdata %>%
  drop_na(gini_mkt, gini_disp, v2csreprss, e_migdppcln)

summary(wvsdata$e_migdppcln)
summary(wvsdata$v2csreprss)
summary(wvsdata$gini_mkt)
summary(wvsdata$gini_disp)
summary(wvsdata$regime_support_group_max)
summary(wvsdata$democratic_expericene)

####################################################################################################

#### Inspect Data and patterns of missings ###
library(missForest)

# Generate 10% missing values at Random
wvsdata.mis <- prodNA(wvsdata, noNA = 0.1)

# Check missing values introduced in the data
summary(wvsdata.mis)

# remove categorical variables
wvsdata.mis <- subset(wvsdata.mis, select = c(sex, age, education, children,
                                              unemployed, income_quintiles))
summary(wvsdata.mis)

##load MICE library
library(mice)
library(VIM)

mice_plot <- aggr(wvsdata.mis, col=c('forestgreen','gray35'),
                  numbers=TRUE, sortVars=TRUE,
                  labels=names(wvsdata.mis), cex.axis=.55,
                  gap=3, ylab=c("Missing data","Pattern"))
mice_plot # education variables with more than 25 % missings, 
#then income deciles and income_quintiles with ~16% missings

#### AMELIA IMPUTATION ####

wvsdata_mi <- wvsdata %>%
  dplyr::select(S001, S002, S003, S020, S025, year, country_year, country_name,
         gini_mkt, gini_disp,
         e_migdppcln, v2csreprss , democratic_expericene, regime_support_group_max, sex, age, education, children,
         unemployed, income_quintiles, income_deciles, civilsociety_b)

wvsdata_mi <- as.data.frame(wvsdata_mi)

require(Amelia)
missmap(wvsdata.mis)

a.wvsdata <- amelia(wvsdata_mi, idvars=c("S001", "S002", "S003", "S020", "S025",
                                         "year", "country_name", "country_year", 
                                         "gini_disp", "gini_mkt",
                                         "v2csreprss", "e_migdppcln", "democratic_expericene", "regime_support_group_max", 
                                         "civilsociety_b"), m=5, 
                    ords=c("sex", "age", "education", "children", "unemployed", "income_quintiles", "income_deciles"), 
                    p2s=0)
summary(a.wvsdata)
summary(a.wvsdata$imputations[[1]])

#### Summary statistics for imputed dataset ####

countryyears <- a.wvsdata$imputations[[1]] %>%
  ungroup()%>%
  drop_na(civilsociety_b) %>%
  count(country_year)

countries <- a.wvsdata$imputations[[1]] %>%
  ungroup()%>%
  drop_na(civilsociety_b) %>%
  count(country_name)

rm(countryyears, countries, mice_plot, wvsdata.mis, wvsdata)

#### Constructing group mean and grand mean centered variables ####

#Author: Zoltan Fazekas
centFUN <- function(x) {
  x - mean(x, na.rm = TRUE)
}

# Use the function to grand-mean center the macro-level predictor
a.wvsdata <- transform(a.wvsdata, gini_mktCGM = centFUN(gini_mkt))
a.wvsdata <- transform(a.wvsdata, gini_dispCGM = centFUN(gini_disp))
a.wvsdata <- transform(a.wvsdata, v2csreprssCGM = centFUN(v2csreprss))
a.wvsdata <- transform(a.wvsdata, e_migdppclnCGM = centFUN(e_migdppcln))
a.wvsdata <- transform(a.wvsdata, regime_support_group_maxCGM = centFUN(regime_support_group_max))
a.wvsdata <- transform(a.wvsdata, democratic_expericeneCGM = centFUN(democratic_expericene))


## Use the fuction to group-mean center the individual predictors
# centering age
a.wvsdata$imputations[[1]] <- a.wvsdata$imputations[[1]] %>%
  group_by(S025) %>%
  mutate(ageCWC = centFUN(age))
a.wvsdata$imputations[[2]] <- a.wvsdata$imputations[[2]] %>%
  group_by(S025) %>%
  mutate(ageCWC = centFUN(age))
a.wvsdata$imputations[[3]] <- a.wvsdata$imputations[[3]] %>%
  group_by(S025) %>%
  mutate(ageCWC = centFUN(age))
a.wvsdata$imputations[[4]] <- a.wvsdata$imputations[[4]] %>%
  group_by(S025) %>%
  mutate(ageCWC = centFUN(age))
a.wvsdata$imputations[[5]] <- a.wvsdata$imputations[[5]] %>%
  group_by(S025) %>%
  mutate(ageCWC = centFUN(age))

# centering income_deciles, income_quintiles and education 

a.wvsdata$imputations[[1]] <- a.wvsdata$imputations[[1]] %>%
  group_by(S025) %>%
  mutate(educationCWC = centFUN(as.numeric(education)),
         income_quintilesCWC = centFUN(as.numeric(income_quintiles)),
         income_decilesCWC = centFUN(as.numeric(income_deciles)))
a.wvsdata$imputations[[2]] <- a.wvsdata$imputations[[2]] %>%
  group_by(S025) %>%
  mutate(educationCWC = centFUN(as.numeric(education)),
         income_quintilesCWC = centFUN(as.numeric(income_quintiles)),
         income_decilesCWC = centFUN(as.numeric(income_deciles)))
a.wvsdata$imputations[[3]] <- a.wvsdata$imputations[[3]] %>%
  group_by(S025) %>%
  mutate(educationCWC = centFUN(as.numeric(education)),
         income_quintilesCWC = centFUN(as.numeric(income_quintiles)),
         income_decilesCWC = centFUN(as.numeric(income_deciles)))
a.wvsdata$imputations[[4]] <- a.wvsdata$imputations[[4]] %>%
  group_by(S025) %>%
  mutate(educationCWC = centFUN(as.numeric(education)),
         income_quintilesCWC = centFUN(as.numeric(income_quintiles)),
         income_decilesCWC = centFUN(as.numeric(income_deciles)))
a.wvsdata$imputations[[5]] <- a.wvsdata$imputations[[5]] %>%
  group_by(S025) %>%
  mutate(educationCWC = centFUN(as.numeric(education)),
         income_quintilesCWC = centFUN(as.numeric(income_quintiles)),
         income_decilesCWC = centFUN(as.numeric(income_deciles)))

summary(a.wvsdata$imputations[[5]])

# Rescaling predicator variables
a.wvsdata$imputations[[1]]$ageCWC <- a.wvsdata$imputations[[1]]$ageCWC/10
a.wvsdata$imputations[[2]]$ageCWC <- a.wvsdata$imputations[[2]]$ageCWC/10
a.wvsdata$imputations[[3]]$ageCWC <- a.wvsdata$imputations[[3]]$ageCWC/10
a.wvsdata$imputations[[4]]$ageCWC <- a.wvsdata$imputations[[4]]$ageCWC/10
a.wvsdata$imputations[[5]]$ageCWC <- a.wvsdata$imputations[[5]]$ageCWC/10
summary(a.wvsdata$imputations[[5]]$ageCWC)

##################################
#Descriptive Statistics
##################################

count <- a.wvsdata$imputations[[5]] %>%
  group_by(S025, income_quintiles) %>%
  tally() 

a.wvsdata$imputations[[5]] <- a.wvsdata$imputations[[5]] %>%
  group_by(S025, income_quintiles) %>%
  mutate(civil_byincome = mean(civilsociety_b))

a.wvsdata$imputations[[5]]  <- merge.data.frame(a.wvsdata$imputations[[5]] , count)

rm(count)

ggplot(data=a.wvsdata$imputations[[5]], aes(x=income_quintiles, y = civil_byincome/n))+
  geom_col() +
  facet_wrap(~country_year, ncol = 5) +
  theme_bw() +
  xlab("Income Quintiles") +
  ylab("Civil Society Participation (Yes)") +
  scale_y_continuous(labels = scales::percent) +
  labs(y = "Average Participation in civil society by income groups",
       x = "Income Quintiles",
       title = "",
       caption = "Data from World Value Survey and V-Dem dataset, version 9, based on MI Dataset")

ggsave("output/C8IncomeQuintiles.pdf", width = 22 , height = 25 , units = c("cm"))

##Summary statistics ##
##Summary statistics ##
library(stargazer)

cols <- c("sex", "ageCWC", "educationCWC", "children", "unemployed", "income_quintilesCWC",
          "gini_mktCGM", "gini_dispCGM", "v2csreprssCGM", "e_migdppclnCGM", 
          "regime_support_group_maxCGM", "democratic_expericeneCGM", "civilsociety_b")

stargazer(
  title="Summary Statistics for Electoral Participation Dataset", 
  covariate.labels=c("Male","Age (centered)", "Education (centered)", "Children", "Unemployed",
                     "Income Quintiles","Market Gini (c)","Disp. Gini (c)" , 
                     "CSO repression (c)", "GDP pc log (c)", "Regime Support Group", "Democratic Experience" ,"Civil Society Particaption"),
  as.data.frame(a.wvsdata$imputations[[5]][, cols]), type = "latex", 
  summary.stat = c("n", "mean", "median", "min", "max", "sd")
)

################################################
#### Analysis ####
################################################

## Loading necessary packages ##
library(tidyverse)
library(Amelia)
library(broom)
library(lme4)

## Build one data frame for regression analysis ##

all_imputations <- bind_rows(unclass(a.wvsdata$imputations), .id = "m") %>%
  group_by(m) %>%
  nest()

all_imputations

## Model 1

v1 <- all_imputations %>%
  mutate(model = data %>% map(~ glmer(civilsociety_b ~ 1 + (1 | S025) + (1 | S003) ,
                                      data = .,
                                      na.action = na.omit,
                                      family = binomial(link = "logit"),
                                      control=glmerControl(optCtrl=list(maxfun=6000)))),
         tidied = model %>% map(~ tidy(., conf.int = TRUE)),
         glance = model %>% map(~ glance(.)))
v1

# Create a wide data frame of just the coefficients and standard errors

v1_par <- v1 %>%
  unnest(tidied) %>%
  select(m, term, estimate, std.error) %>%
  gather(key, value, estimate, std.error) %>%
  group_by(term, m) %>%
  mutate(obs = row_number()) %>%
  spread(term, value) 

v1_par
v1_par <- v1_par %>%
  drop_na(`(Intercept)`) %>%
  select(-obs)

# Extract the coefficients
v1_coefs <- v1_par %>%
  filter(key == "estimate") %>%
  ungroup() %>%
  select(-m, -key)
v1_coefs

# Extract the standard errors
v1_ses <- v1_par %>%
  filter(key == "std.error") %>%
  ungroup() %>%
  select(-m, -key)


#Mi meld function Amelia
v1_melded <- mi.meld(v1_coefs, v1_ses)
v1_melded

# Regression coefficient table
v1_model_dg <- v1 %>%
  unnest(glance) %>%
  filter(m == "imp1") %>%
  pull(df.residual)

v1_table <- as.data.frame(cbind(t(v1_melded$q.mi),
                                t(v1_melded$se.mi))) %>%
  magrittr::set_colnames(c("estimate", "std.error")) %>%
  mutate(term = rownames(.)) %>%
  select(term, everything(.)) %>%
  mutate(statistic = estimate / std.error,
         conf.low = estimate + std.error * qt(0.025, v1_model_dg),
         conf.high = estimate + std.error * qt(0.975, v1_model_dg),
         p.value = 2 * pt(abs(statistic), v1_model_dg, lower.tail = FALSE))

v1_table

v1_data <- v1_table %>%
  drop_na()

## Model 2 ##

v2 <- all_imputations %>%
  mutate(model = data %>% map(~ glmer(civilsociety_b ~ sex + ageCWC+ educationCWC + children + 
                                        unemployed + income_quintilesCWC + (1 | S025) + (1 | S003) ,
                                      data = .,
                                      na.action = na.omit,
                                      family = binomial(link = "logit"),
                                      control=glmerControl(optCtrl=list(maxfun=6000)))),
         tidied = model %>% map(~ tidy(., conf.int = TRUE)),
         glance = model %>% map(~ glance(.)))
v2

# Create a wide data frame of just the coefficients and standard errors

v2_par <- v2 %>%
  unnest(tidied) %>%
  select(m, term, estimate, std.error) %>%
  gather(key, value, estimate, std.error) %>%
  group_by(term, m) %>%
  mutate(obs = row_number()) %>%
  spread(term, value) 

v2_par
v2_par <- v2_par %>%
  drop_na(`ageCWC`) %>%
  select(-obs)

# Extract the coefficients
v2_coefs <- v2_par %>%
  filter(key == "estimate") %>%
  ungroup() %>%
  select(-m, -key)
v2_coefs

# Extract the standard errors
v2_ses <- v2_par %>%
  filter(key == "std.error") %>%
  ungroup() %>%
  select(-m, -key)


#Mi meld function Amelia
v2_melded <- mi.meld(v2_coefs, v2_ses)
v2_melded 

#Regression coefficient table

v2_model_dg <- v2 %>%
  unnest(glance) %>%
  filter(m == "imp1") %>%
  pull(df.residual)

v2_table <- as.data.frame(cbind(t(v2_melded$q.mi),
                                t(v2_melded$se.mi))) %>%
  magrittr::set_colnames(c("estimate", "std.error")) %>%
  mutate(term = rownames(.)) %>%
  select(term, everything()) %>%
  mutate(statistic = estimate / std.error,
         conf.low = estimate + std.error * qt(0.025, v2_model_dg),
         conf.high = estimate + std.error * qt(0.975, v2_model_dg),
         p.value = 2 * pt(abs(statistic), v2_model_dg, lower.tail = FALSE))

v2_table

v2_data <- v2_table %>%
  drop_na()
v2_data

## Model 3 ##

v3 <- all_imputations %>%
  mutate(model = data %>% map(~ glmer(civilsociety_b ~ sex + ageCWC+ educationCWC + children + 
                                        unemployed + income_quintilesCWC + 
                                        gini_mktCGM + (1 | S025) + (1 | S003) ,
                                      data = .,
                                      na.action = na.omit,
                                      family = binomial(link = "logit"),
                                      control=glmerControl(optCtrl=list(maxfun=6000)))),
         tidied = model %>% map(~ tidy(., conf.int = TRUE)),
         glance = model %>% map(~ glance(.)))
v3

#Inspect the data for imputation dataset 1
v3 %>%
  filter(m == "imp1") %>%
  unnest(tidied)

# Create a wide data frame of just the coefficients and standard errors

v3_par <- v3 %>%
  unnest(tidied) %>%
  select(m, term, estimate, std.error) %>%
  gather(key, value, estimate, std.error) %>%
  group_by(term, m) %>%
  mutate(obs = row_number()) %>%
  spread(term, value) 

v3_par
v3_par <- v3_par %>%
  drop_na(`ageCWC`) %>%
  select(-obs)

# Extract the coefficients
v3_coefs <- v3_par %>%
  filter(key == "estimate") %>%
  ungroup() %>%
  select(-m, -key)
v3_coefs

# Extract the standard errors
v3_ses <- v3_par %>%
  filter(key == "std.error") %>%
  ungroup() %>%
  select(-m, -key)


#Mi meld function Amelia
v3_melded <- mi.meld(v3_coefs, v3_ses)
v3_melded 

#Regression coefficient table

v3_model_dg <- v3 %>%
  unnest(glance) %>%
  filter(m == "imp1") %>%
  pull(df.residual)

v3_table <- as.data.frame(cbind(t(v3_melded$q.mi),
                                t(v3_melded$se.mi))) %>%
  magrittr::set_colnames(c("estimate", "std.error")) %>%
  mutate(term = rownames(.)) %>%
  select(term, everything()) %>%
  mutate(statistic = estimate / std.error,
         conf.low = estimate + std.error * qt(0.025, v3_model_dg),
         conf.high = estimate + std.error * qt(0.975, v3_model_dg),
         p.value = 2 * pt(abs(statistic), v3_model_dg, lower.tail = FALSE))

v3_table

v3_data <- v3_table %>%
  drop_na()
v3_data

## Model 4 ##

v4 <- all_imputations %>%
  mutate(model = data %>% map(~ glmer(civilsociety_b ~ sex + ageCWC+ educationCWC + children + 
                                        unemployed + income_quintilesCWC + 
                                        gini_mktCGM + v2csreprssCGM + e_migdppclnCGM + 
                                        democratic_expericeneCGM + regime_support_group_maxCGM +
                                        (1 | S025) + (1 | S003) ,
                                      data = .,
                                      na.action = na.omit,
                                      family = binomial(link = "logit"),
                                      control=glmerControl(optCtrl=list(maxfun=6000)))),
         tidied = model %>% map(~ tidy(., conf.int = TRUE)),
         glance = model %>% map(~ glance(.)))
v4

#Inspect the data for imputation dataset 1
v4 %>%
  filter(m == "imp1") %>%
  unnest(tidied)

# Create a wide data frame of just the coefficients and standard errors

v4_par <- v4 %>%
  unnest(tidied) %>%
  select(m, term, estimate, std.error) %>%
  gather(key, value, estimate, std.error) %>%
  group_by(term, m) %>%
  mutate(obs = row_number()) %>%
  spread(term, value) 

v4_par
v4_par <- v4_par %>%
  drop_na(`ageCWC`) %>%
  select(-obs)


# Extract the coefficients
v4_coefs <- v4_par %>%
  filter(key == "estimate") %>%
  ungroup() %>%
  select(-m, -key)
v4_coefs

# Extract the standard errors
v4_ses <- v4_par %>%
  filter(key == "std.error") %>%
  ungroup() %>%
  select(-m, -key)
v4_ses

#Mi meld function Amelia
v4_melded <- mi.meld(v4_coefs, v4_ses)
v4_melded 

#Regression coefficient table

v4_model_dg <- v4 %>%
  unnest(glance) %>%
  filter(m == "imp1") %>%
  pull(df.residual)

v4_table <- as.data.frame(cbind(t(v4_melded$q.mi),
                                t(v4_melded$se.mi))) %>%
  magrittr::set_colnames(c("estimate", "std.error")) %>%
  mutate(term = rownames(.)) %>%
  select(term, everything()) %>%
  mutate(statistic = estimate / std.error,
         conf.low = estimate + std.error * qt(0.025, v4_model_dg),
         conf.high = estimate + std.error * qt(0.975, v4_model_dg),
         p.value = 2 * pt(abs(statistic), v4_model_dg, lower.tail = FALSE))

v4_table

v4_data <- v4_table %>%
  drop_na()
v4_data

## Model 5 ##

v5 <- all_imputations %>%
  mutate(model = data %>% map(~ glmer(civilsociety_b ~ sex + ageCWC+ educationCWC + children + 
                                        unemployed + income_quintilesCWC + 
                                        gini_mktCGM + v2csreprssCGM + e_migdppclnCGM + 
                                        democratic_expericeneCGM + regime_support_group_maxCGM +
                                        (1 + income_quintilesCWC| S025) + (1 | S003),
                                      data = .,
                                      na.action = na.omit,
                                      family = binomial(link = "logit"),
                                      control=glmerControl(optCtrl=list(maxfun=6000)))),
         tidied = model %>% map(~ tidy(., conf.int = TRUE)),
         glance = model %>% map(~ glance(.)))
v5

#Inspect the data for imputation dataset 1
v5 %>%
  filter(m == "imp1") %>%
  unnest(tidied)

# Create a wide data frame of just the coefficients and standard errors
v5_par <- v5 %>%
  unnest(tidied) %>%
  select(m, term, estimate, std.error) %>%
  gather(key, value, estimate, std.error) %>%
  group_by(term, m) %>%
  mutate(obs = row_number()) %>%
  spread(term, value) 

v5_par
v5_par <- v5_par %>%
  drop_na(`ageCWC`) %>%
  select(-obs)

# Extract the coefficients
v5_coefs <- v5_par %>%
  filter(key == "estimate") %>%
  ungroup() %>%
  select(-m, -key)
v5_coefs

# Extract the standard errors
v5_ses <- v5_par %>%
  filter(key == "std.error") %>%
  ungroup() %>%
  select(-m, -key)
v5_ses

#Mi meld function Amelia
v5_melded <- mi.meld(v5_coefs, v5_ses)
v5_melded 

#Regression coefficient table

v5_model_dg <- v5 %>%
  unnest(glance) %>%
  filter(m == "imp1") %>%
  pull(df.residual)

v5_table <- as.data.frame(cbind(t(v5_melded$q.mi),
                                t(v5_melded$se.mi))) %>%
  magrittr::set_colnames(c("estimate", "std.error")) %>%
  mutate(term = rownames(.)) %>%
  select(term, everything()) %>%
  mutate(statistic = estimate / std.error,
         conf.low = estimate + std.error * qt(0.025, v5_model_dg),
         conf.high = estimate + std.error * qt(0.975, v5_model_dg),
         p.value = 2 * pt(abs(statistic), v5_model_dg, lower.tail = FALSE))

v5_table

v5_data <- v5_table %>%
  drop_na()
v5_data

## Model 6 ##

v6 <- all_imputations %>%
  mutate(model = data %>% map(~ glmer(civilsociety_b ~ sex + ageCWC+ educationCWC + children + 
                                        unemployed + income_quintilesCWC +
                                        gini_mktCGM + v2csreprssCGM + e_migdppclnCGM + 
                                        democratic_expericeneCGM + regime_support_group_maxCGM +
                                        income_quintilesCWC*gini_mktCGM + v2csreprssCGM*income_quintilesCWC +
                                        (1 + income_quintilesCWC| S025) + (1 | S003),
                                      data = .,
                                      na.action = na.omit,
                                      family = binomial(link = "logit"),
                                      control=glmerControl(optCtrl=list(maxfun=6000)))),
         tidied = model %>% map(~ tidy(., conf.int = TRUE)),
         glance = model %>% map(~ glance(.)))
v6

#Inspect the data for imputation dataset 1
v6 %>%
  filter(m == "imp1") %>%
  unnest(tidied)

# Create a wide data frame of just the coefficients and standard errors
v6_par <- v6 %>%
  unnest(tidied) %>%
  select(m, term, estimate, std.error) %>%
  gather(key, value, estimate, std.error) %>%
  group_by(term, m) %>%
  mutate(obs = row_number()) %>%
  spread(term, value) 

v6_par
v6_par <- v6_par %>%
  drop_na(`ageCWC`) %>%
  select(-obs)

# Extract the coefficients
v6_coefs <- v6_par %>%
  filter(key == "estimate") %>%
  ungroup() %>%
  select(-m, -key)
v6_coefs

# Extract the standard errors
v6_ses <- v6_par %>%
  filter(key == "std.error") %>%
  ungroup() %>%
  select(-m, -key)
v6_ses

#Mi meld function Amelia
v6_melded <- mi.meld(v6_coefs, v6_ses)
v6_melded 

#Regression coefficient table

v6_model_dg <- v6 %>%
  unnest(glance) %>%
  filter(m == "imp1") %>%
  pull(df.residual)

v6_table <- as.data.frame(cbind(t(v6_melded$q.mi),
                                t(v6_melded$se.mi))) %>%
  magrittr::set_colnames(c("estimate", "std.error")) %>%
  mutate(term = rownames(.)) %>%
  select(term, everything()) %>%
  mutate(statistic = estimate / std.error,
         conf.low = estimate + std.error * qt(0.025, v6_model_dg),
         conf.high = estimate + std.error * qt(0.975, v6_model_dg),
         p.value = 2 * pt(abs(statistic), v6_model_dg, lower.tail = FALSE))

v6_table
v6_data <- v6_table %>%
  drop_na()
v6_data

## Model 7 ##

v7 <- all_imputations %>%
  mutate(model = data %>% map(~ glmer(civilsociety_b ~ sex + ageCWC+ educationCWC + children + 
                                        unemployed + income_quintilesCWC +
                                        v2csreprssCGM + e_migdppclnCGM + 
                                        democratic_expericeneCGM + regime_support_group_maxCGM +
                                        income_quintilesCWC*v2csreprssCGM +
                                        (1 + income_quintilesCWC| S025) + (1 | S003),
                                      data = .,
                                      na.action = na.omit,
                                      family = binomial(link = "logit"),
                                      control=glmerControl(optCtrl=list(maxfun=6000)))),
         tidied = model %>% map(~ tidy(., conf.int = TRUE)),
         glance = model %>% map(~ glance(.)))
v7

#Inspect the data for imputation dataset 1
v7 %>%
  filter(m == "imp1") %>%
  unnest(tidied)

# Create a wide data frame of just the coefficients and standard errors
v7_par <- v7 %>%
  unnest(tidied) %>%
  select(m, term, estimate, std.error) %>%
  gather(key, value, estimate, std.error) %>%
  group_by(term, m) %>%
  mutate(obs = row_number()) %>%
  spread(term, value) 

v7_par
v7_par <- v7_par %>%
  drop_na(`ageCWC`) %>%
  select(-obs)

# Extract the coefficients
v7_coefs <- v7_par %>%
  filter(key == "estimate") %>%
  ungroup() %>%
  select(-m, -key)
v7_coefs

# Extract the standard errors
v7_ses <- v7_par %>%
  filter(key == "std.error") %>%
  ungroup() %>%
  select(-m, -key)
v7_ses

#Mi meld function Amelia
v7_melded <- mi.meld(v7_coefs, v7_ses)
v7_melded 

#Regression coefficient table

v7_model_dg <- v7 %>%
  unnest(glance) %>%
  filter(m == "imp1") %>%
  pull(df.residual)

v7_table <- as.data.frame(cbind(t(v7_melded$q.mi),
                                t(v7_melded$se.mi))) %>%
  magrittr::set_colnames(c("estimate", "std.error")) %>%
  mutate(term = rownames(.)) %>%
  select(term, everything()) %>%
  mutate(statistic = estimate / std.error,
         conf.low = estimate + std.error * qt(0.025, v7_model_dg),
         conf.high = estimate + std.error * qt(0.975, v7_model_dg),
         p.value = 2 * pt(abs(statistic), v7_model_dg, lower.tail = FALSE))

v7_table
v7_data <- v7_table %>%
  drop_na()
v7_data

#### Dot-Whisker Plot ####

library(dotwhisker)
library(broom.mixed)
library(dplyr)
library(tidyr)

v1_df <- v1_data %>%
  by_2sd(a.wvsdata[[1]]) %>%
  arrange(term) %>%
  mutate(model="Model 1") %>%
  relabel_predictors(c(sex = "Male",                       
                       ageCWC = "Age (centered)", 
                       educationCWC = "Education (centered)", 
                       children = "Children", 
                       unemployed = "Unemployed", 
                       income_quintilesCWC = "Income (centered)",
                       gini_mktCGM = "Market Inequality (centered)",
                       v2csreprssCGM = "CSO repression (centered)",
                       e_migdppclnCGM = "GDP pc log (centered)",
                       democratic_expericeneCGM = "Previos Dem. Experience (centered)", 
                       regime_support_group_maxCGM = "Regime Support Group (centered)",
                       "income_quintilesCWC:gini_mktCGM" = "Income * Inequality",
                       "income_quintilesCWC:v2csreprssCGM" = "Income * CSO repression"))

v2_df <- v2_data %>%
  by_2sd(a.wvsdata[[1]]) %>%
  arrange(term) %>%
  mutate(model="Model 2") %>%
  relabel_predictors(c(sex = "Male",                       
                       ageCWC = "Age (centered)", 
                       educationCWC = "Education (centered)", 
                       children = "Children", 
                       unemployed = "Unemployed", 
                       income_quintilesCWC = "Income (centered)",
                       gini_mktCGM = "Market Inequality (centered)",
                       v2csreprssCGM = "CSO repression (centered)",
                       e_migdppclnCGM = "GDP pc log (centered)",
                       democratic_expericeneCGM = "Previos Dem. Experience (centered)", 
                       regime_support_group_maxCGM = "Regime Support Group (centered)",
                       "income_quintilesCWC:gini_mktCGM" = "Income * Inequality",
                       "income_quintilesCWC:v2csreprssCGM" = "Income * CSO repression"))

v3_df <- v3_data %>%
  by_2sd(a.wvsdata[[1]]) %>%
  arrange(term) %>%
  mutate(model="Model 3") %>%
  relabel_predictors(c(sex = "Male",                       
                       ageCWC = "Age (centered)", 
                       educationCWC = "Education (centered)", 
                       children = "Children", 
                       unemployed = "Unemployed", 
                       income_quintilesCWC = "Income (centered)",
                       gini_mktCGM = "Market Inequality (centered)",
                       v2csreprssCGM = "CSO repression (centered)",
                       e_migdppclnCGM = "GDP pc log (centered)",
                       democratic_expericeneCGM = "Previos Dem. Experience (centered)", 
                       regime_support_group_maxCGM = "Regime Support Group (centered)",
                       "income_quintilesCWC:gini_mktCGM" = "Income * Inequality",
                       "income_quintilesCWC:v2csreprssCGM" = "Income * CSO repression"))

v4_df <- v4_data %>%
  by_2sd(a.wvsdata[[1]]) %>%
  arrange(term) %>%
  mutate(model="Model 4") %>%
  relabel_predictors(c(sex = "Male",                       
                       ageCWC = "Age (centered)", 
                       educationCWC = "Education (centered)", 
                       children = "Children", 
                       unemployed = "Unemployed", 
                       income_quintilesCWC = "Income (centered)",
                       gini_mktCGM = "Market Inequality (centered)",
                       v2csreprssCGM = "CSO repression (centered)",
                       e_migdppclnCGM = "GDP pc log (centered)",
                       democratic_expericeneCGM = "Previos Dem. Experience (centered)", 
                       regime_support_group_maxCGM = "Regime Support Group (centered)",
                       "income_quintilesCWC:gini_mktCGM" = "Income * Inequality",
                       "income_quintilesCWC:v2csreprssCGM" = "Income * CSO repression"))

v5_df <- v5_data %>%
  by_2sd(a.wvsdata[[1]]) %>%
  arrange(term) %>%
  mutate(model="Model 5") %>%
  relabel_predictors(c(sex = "Male",                       
                       ageCWC = "Age (centered)", 
                       educationCWC = "Education (centered)", 
                       children = "Children", 
                       unemployed = "Unemployed", 
                       income_quintilesCWC = "Income (centered)",
                       gini_mktCGM = "Market Inequality (centered)",
                       v2csreprssCGM = "CSO repression (centered)",
                       e_migdppclnCGM = "GDP pc log (centered)",
                       democratic_expericeneCGM = "Previos Dem. Experience (centered)", 
                       regime_support_group_maxCGM = "Regime Support Group (centered)",
                       "income_quintilesCWC:gini_mktCGM" = "Income * Inequality",
                       "income_quintilesCWC:v2csreprssCGM" = "Income * CSO repression"))

v6_df <- v6_data %>%
  by_2sd(a.wvsdata[[1]]) %>%
  arrange(term) %>%
  mutate(model="Model 6") %>%
  relabel_predictors(c(sex = "Male",                       
                       ageCWC = "Age (centered)", 
                       educationCWC = "Education (centered)", 
                       children = "Children", 
                       unemployed = "Unemployed", 
                       income_quintilesCWC = "Income (centered)",
                       gini_mktCGM = "Market Inequality (centered)",
                       v2csreprssCGM = "CSO repression (centered)",
                       e_migdppclnCGM = "GDP pc log (centered)",
                       democratic_expericeneCGM = "Previos Dem. Experience (centered)", 
                       regime_support_group_maxCGM = "Regime Support Group (centered)",
                       "income_quintilesCWC:gini_mktCGM" = "Income * Inequality",
                       "income_quintilesCWC:v2csreprssCGM" = "Income * CSO repression"))


# Manipulating Interaction values manually

v6_df <- v6_df %>%
  mutate(estimate = ifelse(term=="Income * Inequality", -0.0005179444, estimate)) %>%
  mutate(std.error = ifelse(term=="Income * Inequality", -0.0005179444, std.error)) %>%
  mutate(conf.low = ifelse(term=="Income * Inequality", -0.007689194, conf.low)) %>%
  mutate(conf.high = ifelse(term=="Income * Inequality",0.006653305, conf.high)) %>%
  mutate(estimate = ifelse(term=="Income * CSO repression", -0.0239684990, estimate)) %>%
  mutate(std.error = ifelse(term=="Income * CSO repression", 0.032249774, std.error)) %>%
  mutate(conf.low = ifelse(term=="Income * CSO repression", -0.087177698, conf.low)) %>%
  mutate(conf.high = ifelse(term=="Income * CSO repression", 0.039240700, conf.high)) 

v7_df <- v7_data %>%
  by_2sd(a.wvsdata[[1]]) %>%
  arrange(term) %>%
  mutate(model="Model 7") %>%
  relabel_predictors(c(sex = "Male",                       
                       ageCWC = "Age (centered)", 
                       educationCWC = "Education (centered)", 
                       children = "Children", 
                       unemployed = "Unemployed", 
                       income_quintilesCWC = "Income (centered)",
                       gini_mktCGM = "Market Inequality (centered)",
                       v2csreprssCGM = "CSO repression (centered)",
                       e_migdppclnCGM = "GDP pc log (centered)",
                       democratic_expericeneCGM = "Previos Dem. Experience (centered)", 
                       regime_support_group_maxCGM = "Regime Support Group (centered)",
                       "income_quintilesCWC:gini_mktCGM" = "Income * Inequality",
                       "income_quintilesCWC:v2csreprssCGM" = "Income * CSO repression"))

v7_df <- v7_df %>%
    mutate(estimate = ifelse(term=="Income * CSO repression",-0.02446087, estimate)) %>%
  mutate(std.error = ifelse(term=="Income * CSO repression", 0.032019227, std.error)) %>%
  mutate(conf.low = ifelse(term=="Income * CSO repression", -0.08721820, conf.low)) %>%
  mutate(conf.high = ifelse(term=="Income * CSO repression", 0.03829646, conf.high)) 


#Dot-Whisker Plot 

v_models <- rbind(v2_df, v3_df, v4_df, v5_df, v6_df, v7_df)

v_brackets <- list(c("Individual Predictors", "Male", "Income (centered)"), 
                   c("Country-year Predictors", "Market Inequality (centered)", "Regime Support Group (centered)"),
                   c("Cross level Interactions", "Income * Inequality" , "Income * CSO repression"))

v_plot <- {dwplot(v_models) +
    theme_bw() + xlab("Coefficient Estimate") + ylab("") +
    geom_vline(xintercept = 0, colour = "grey60", linetype = 2) +
    ggtitle("Predicting Civil Society Participation, Multiple Imputation Model") +
    theme(plot.title = element_text(face="bold"),
          legend.position = c(0.85, 0.8),
          legend.justification = c(0, 0), 
          legend.background = element_rect(colour="grey80"),
          legend.title = element_blank())} %>%
  add_brackets(v_brackets)

v_plot
ggsave("output/C8dotwhisker.pdf", width = 22 , height = 25 , units = c("cm"))

## Extract Texreg Table ##

extract_broom <- function(tidy_model, glance_model) {
  # get estimates/standard errors from tidy
  coef <- tidy_model$estimate
  coef.names <- as.character(tidy_model$term)
  se <- tidy_model$std.error
  pvalues <- tidy_model$p.value
  # get goodness-of-fit statistics from glance
  glance_transposed <- as_tibble(cbind(name = names(glance_model), t(glance_model)))
  gof.names <- as.character(glance_transposed$name)
  gof <- as.double(glance_transposed$value)
  gof.decimal <- gof %% 1 > 0
  tr_object <- texreg::createTexreg(coef.names = coef.names,
                                    coef = coef,
                                    se = se,
                                    pvalues = pvalues,
                                    gof.names = gof.names,
                                    gof = gof,
                                    gof.decimal = gof.decimal)
  return(tr_object)
}

extract_broom <- function(tidy_model, glance_model) {
  # get estimates/standard errors from tidy
  coef <- tidy_model$estimate
  coef.names <- as.character(tidy_model$term)
  se <- tidy_model$std.error
  pvalues <- tidy_model$p.value
  tr_object <- texreg::createTexreg(coef.names = coef.names,
                                    coef = coef,
                                    se = se,
                                    pvalues = pvalues)
  return(tr_object)
}

v1_tex <- extract_broom(v1_data)
v2_tex <- extract_broom(v2_data)
v3_tex <- extract_broom(v3_data)
v4_tex <- extract_broom(v4_data)
v5_tex <- extract_broom(v5_data)
v6_tex <- extract_broom(v6_data)
v7_tex <- extract_broom(v7_data)

texreg(list(v1_tex, v2_tex, v3_tex, v4_tex, v5_tex, v6_tex, v7_tex), 
       head.tag = TRUE, body.tag = TRUE,
       digits = 2,
       custom.coef.names = c("(Intercept)",
                             "Age (centered)",
                             "Children",
                             "Education(centered)",
                             "Income quintiles (centered)",
                             "Sex",
                             "unemployed",
                             "Inequality (centered)",
                             "CSO repression (centered)",
                             "GDP pc log (centered)",
                             "Previos Dem. Experience (centered)", 
                             "Regime Support Group (centered)",
                             "Income * Inequality",
                             "Income * CSO repression"),
       booktabs = TRUE,
       use.packages = FALSE,
       caption = "Multiple Impuation: Predicting Civil Society Participation",
       fontsize = "scriptsize",
       label = "Table:E8", 
       longtable = TRUE)

####Manually fill additional information regression ####

## Prediting AIC; BIC; and Log Likelihood##

## Function written by Andrew Heiss 
##(https://www.andrewheiss.com/blog/2018/03/08/amelia-broom-huxtable/)

tidy.melded <- function(x, conf.int = FALSE, conf.level = 0.95) {
  # Get the df from one of the models
  model_degrees_freedom <- glance(x[[1]])$df.residual
  
  # Create matrices of the estimates and standard errors
  params <- data_frame(models = x) %>%
    mutate(m = 1:n(),
           tidied = models %>% map(tidy)) %>% 
    unnest(tidied) %>%
    select(m, term, estimate, std.error) %>%
    gather(key, value, estimate, std.error) %>%
    mutate(term = fct_inorder(term)) %>%  # Order the terms so that spread() keeps them in order
    spread(term, value)
  
  just_coefs <- params %>% filter(key == "estimate") %>% select(-m, -key)
  just_ses <- params %>% filter(key == "std.error") %>% select(-m, -key)
  
  # Meld the coefficients with Rubin's rules
  coefs_melded <- mi.meld(just_coefs, just_ses)
  
  # Create tidy output
  output <- as.data.frame(cbind(t(coefs_melded$q.mi),
                                t(coefs_melded$se.mi))) %>%
    magrittr::set_colnames(c("estimate", "std.error")) %>%
    mutate(term = rownames(.)) %>%
    select(term, everything()) %>%
    mutate(statistic = estimate / std.error,
           p.value = 2 * pt(abs(statistic), model_degrees_freedom, lower.tail = FALSE))
  
  # Add confidence intervals if needed
  if (conf.int & conf.level) {
    # Convert conf.level to tail values (0.025 when it's 0.95)
    a <- (1 - conf.level) / 2
    
    output <- output %>% 
      mutate(conf.low = estimate + std.error * qt(a, model_degrees_freedom),
             conf.high = estimate + std.error * qt((1 - a), model_degrees_freedom))
  }
  
  # tidy objects only have a data.frame class, not tbl_df or anything else
  class(output) <- "data.frame"
  output
}

glance.melded <- function(x) {
  # Because the properly melded parameters and the simple average of the
  # parameters of these models are roughly the same (see
  # https://www.andrewheiss.com/blog/2018/03/07/amelia-tidy-melding/), for the
  # sake of simplicty we just take the average here
  output <- data_frame(models = x) %>%
    mutate(glance = models %>% map(glance)) %>%
    unnest(glance) %>%
    summarize_at(vars(logLik, AIC, BIC),
                 funs(mean)) %>%
    mutate(m = as.integer(length(x)))
  
  # glance objects only have a data.frame class, not tbl_df or anything else
  class(output) <- "data.frame"
  output
}

nobs.melded <- function(x, ...) {
  # Take the number of observations from the first model
  nobs(x[[1]])
}

####

# Extract the models into a vector and make it a "melded" class
v1_imputed <- v1$model
v2_imputed <- v2$model
v3_imputed <- v3$model
v4_imputed <- v4$model
v5_imputed <- v5$model
v6_imputed <- v6$model
v7_imputed <- v7$model


v1_modelfit <- glance.melded(v1_imputed)
v2_modelfit <- glance.melded(v2_imputed)
v3_modelfit <- glance.melded(v3_imputed)
v4_modelfit <- glance.melded(v4_imputed)
v5_modelfit <- glance.melded(v5_imputed)
v6_modelfit <- glance.melded(v6_imputed)
v7_modelfit <- glance.melded(v7_imputed)

modelfits <- v1_modelfit %>%
  bind_rows(v2_modelfit, v3_modelfit, v4_modelfit, v5_modelfit, v6_modelfit, v7_modelfit)
modelfits


